####
# Analysis SHP data
#####

rm(list = ls())

library(data.table)
library(lfe)
library(tidyverse)
library(here)
library(qs)
library(haven)
library(texreg)
source(here("code", "functions.R"))

# Load data ---------------------------------------------------------------
shp <- qread(here("data", "shp.qs")) 

# Functions that run the regressions --------------------

#Define a function that standardizes the variables and runs the regressions for the entire SHP sample
shp_regression_function <- function(data = shp, vars, ticino = FALSE) {
  models <- list() # Initialize an empty list to store the models
  
  for (var in vars) {
    # Standardize the variable
    standardized_var <- paste(var, "std", sep = "_")
    data[[standardized_var]] <- data[[var]]
    data[[standardized_var]][which(data[[standardized_var]] < 0)] <- NA
    data[[standardized_var]] <- (data[[standardized_var]] - mean(data[[standardized_var]], na.rm = TRUE)) / sd(data[[standardized_var]], na.rm = TRUE)
    
    # Filter data for the regression
    reg_data <- data %>% 
      filter(!is.na(data[[standardized_var]]), !is.na(weight_i_pop), travelMUNmin <= 30, BR == 1) 
    
    ti <- ""
    if(ticino){ti = "_ti"}
    
    unit_fe <- "idpers"
    
    # Run the regression
    model_formula <- as.formula(paste0(standardized_var, " ~ transBorder15 + freeBorder15 | bfs19 + year + ", unit_fe, " | 0 | bfs19"))
    models[[paste0("mod", "_", var, ti)]] <- felm(formula = model_formula,
                                                   data = reg_data,
                                                   weights = reg_data$weight_i_pop)
  }
  
  return(models)
}


#Define a function that standardizes the variables and runs the regressions for each political interest group for the Ticino sample
political_interest_regression_function <- function(var_name, data, canton = "") {
  
  # Standardize the variable
  var_std <- paste(var_name, "std", sep = "_")
  data[[var_std]] <- data[[var_name]]
  data[[var_std]][which(data[[var_std]] < 0)] <- NA
  data[[var_std]] <- (data[[var_std]] - mean(data[[var_std]], na.rm = TRUE)) / sd(data[[var_std]], na.rm = TRUE)
  
  # Define the unit FE
  unit_fe <- "idpers"
  
  # Define the regression formula
  model_formula <- as.formula(paste0(var_std, "~ transBorder15 + freeBorder15 | bfs19 + year + ", unit_fe, " | 0 | bfs19"))
  
  # Initialize an empty list to store results
  results <- list()
  
  # Iterate over political interest levels
  interest_levels <- c("low", "med", "high")
  
  if(canton == "TI"){canton = "_ti"}
  
  for (interest_level in interest_levels) {
    # Time-varying groups
    data_filtered <- data %>%
      filter(!is.na(!!sym(var_std)), !is.na(weight_i_pop), travelMUNmin <= 30, BR == 1) %>%
      filter(!!sym(paste("polit_interest", interest_level, sep = "_")) == 1)
    
    results[[paste0("mod", "_", var_name, "_", interest_level, canton)]] <- felm(model_formula, 
                                                                               weights = data_filtered$weight_i_pop, 
                                                                               data = data_filtered)
    
    # Time-invariant groups
    data_filtered <- data %>%
      filter(!is.na(!!sym(var_std)), !is.na(weight_i_pop), travelMUNmin <= 30, BR == 1) %>%
      filter(polit_interest_cat == interest_level)
    
    results[[paste0("mod_stable", "_", var_name, "_", interest_level, canton)]] <- felm(model_formula, 
                                                                                      weights = data_filtered$weight_i_pop, 
                                                                                      data = data_filtered)
  }
  
  return(results)
  
}


# Variables of interest and run regressions ---------------------------------

#Variables of interest
vars <- c("satis_fin", #Higher is more financial satisfaction
          "cmj_risk_unempl", #Higher is more risk of unemployment
          #"housingexp", #Higher is accommodation too expensive. This is done below because housing expenses is measured at the household level
          "opin_army", #Higher is favoring no army
          "trust_gov", #Higher is confident in federal government
          "satis_demo", #Higher is satisfied with democracy
          "opin_eu", #Higher is favoring staying outside EU (1 is in favor of joining EU)
          "opin_foreign",
          "opin_taxation"
          ) #Higher is more opportunities for Swiss (less equality btwn Swiss and foreigners)

# Run the function
models <- shp_regression_function(shp, vars)

# Assign each model to a variable in the global environment
list2env(models, envir = .GlobalEnv)
rm(models)


#Housing expenses (data is at household level)
shp_hh <- shp %>% 
  group_by(idhouse, year) %>% 
  summarise(transBorder15 = mean(transBorder15),
            freeBorder15 = mean(freeBorder15),
            bfs19 = mean(bfs19),
            housingexp = mean(housingexp),
            weight_h_pop = mean(weight_h_pop),
            travelMUNmin = mean(travelMUNmin),
            BR = mean(BR)
            )


shp_hh$housingexp[which(shp_hh$housingexp < 0)] <- NA
shp_hh$housingexp_std <- (shp_hh$housingexp - mean(shp_hh$housingexp, na.rm = T)) / sd(shp_hh$housingexp, na.rm = T)

shp_hh <- shp_hh %>% 
  filter(!is.na(housingexp_std), !is.na(weight_h_pop), travelMUNmin <= 30, BR == 1) 

mod_housingexp <- felm(housingexp_std ~ transBorder15 + freeBorder15 | bfs19 + year + idhouse | 0 | bfs19, data = shp_hh, 
                       weights = shp_hh$weight_h_pop)




# Opinion Foreigners by political interest in Ticino ----------------------------------------------------

## Groups for heterogeneity analysis in Ticino ----------------------------------------------------

#Higher is more opportunities for Swiss (less equality)
shp_ti <- shp %>% filter(canton_name == "TI")




shp_ti$polit_interest[which(shp_ti$polit_interest < 0)] <- NA
quants_ti <- quantile(shp_ti$polit_interest, na.rm = T, probs = c(0, .33, .67, 1))

#Time-invariant groups
shp_ti_polit_int_avg <- shp_ti %>% 
  group_by(idpers) %>% 
  summarise(polit_interest_avg = mean(polit_interest, na.rm = T),
            polit_interest_cat = case_when(polit_interest_avg <= 3 ~ "low",
                                           polit_interest_avg <= 6 ~ "med",
                                           polit_interest_avg <= 10 ~ "high",
                                           TRUE ~ NA_character_))

shp_ti_polit_int_avg$polit_interest_cat %>% table %>% prop.table #Groups are roughly balanced. Slightly more low than others

#Define time-varying groups of political interest
shp_ti <- shp_ti %>%
  mutate(polit_interest_low = case_when(polit_interest <= 3 ~ 1,
                                        polit_interest > 3 ~ 0,
                                        TRUE ~ NA_real_),
         polit_interest_med = case_when(polit_interest > 3 & polit_interest <= 6 ~ 1,
                                        polit_interest <= 3 | polit_interest > 6 ~ 0,
                                        TRUE ~ NA_real_),
         polit_interest_high = case_when(polit_interest > 6 ~ 1,
                                        polit_interest <= 6 ~ 0,
                                        TRUE ~ NA_real_))

shp_ti$polit_interest_low %>% table %>% prop.table
shp_ti$polit_interest_med %>% table %>% prop.table
shp_ti$polit_interest_high %>% table %>% prop.table #Slightly more high and less medium. But this makes sample size for our group of interest slightly smaller.

shp_ti <- shp_ti %>% left_join(shp_ti_polit_int_avg)


# Regressions by political interest -------------------------------------------------------------------------

## In Ticino ---------------------------------------------------------------
# 
# #Normal regression without political interest subgroups

# Regression by subgroup
vars = c("satis_fin", "cmj_risk_unempl", "housingexp", "opin_foreign") #Political interest are at individual level so we include housing expenses here with the individual level variables

results_list <- lapply(vars, data = shp_ti, canton = "TI", political_interest_regression_function)

# Flatten the list of lists into a single list
flattened_results <- do.call(c, results_list)

# Add results to global environment
list2env(flattened_results, envir = .GlobalEnv)


# Cultural characteristics with political interest ------------------------
shp_ti <- shp_ti %>% 
  mutate(nationality = case_when(nation == 8100 ~ "Swiss",
                                 nation %in% c(8218, 8207, 8212) ~ "Other",
                                 nation %in% 8100:8600 ~ "Other",
                                 TRUE ~ NA_character_),
         nationality = factor(nationality, levels = c("Other", "Swiss")),
         religion = case_when(pr01 == 1 ~ "Other",
                              pr01 == 2 ~ "Roman Catholic",
                              pr01 == 8 ~ "Other",
                              pr01 > 0 ~ "Other",
                              TRUE ~ NA_character_),
         religion = factor(religion, levels = c("Other", "Roman Catholic")),
         religiosity = case_when(pr04 > 5 ~ "At least once a month",
                                 pr04 %in% 1:5 ~ "Other",
                                 TRUE ~ NA_character_
                                 ),
         religiosity = factor(religiosity, levels = c("Other", "At least once a month")),
         first_language = case_when(pe16 %in% c(1,2, 3, 4, 6) ~ "Other",
                                    pe16 %in% c(5,7) ~ "Italian",
                                    pe16 > 0 ~ "Other",
                                    TRUE ~ NA_character_),
         first_language = factor(first_language, levels = c("Other", "Italian")),
         Italian_speaker = case_when(first_language == "Italian" ~ 1,
                                     first_language == "Other" ~ 0,
                                     TRUE ~ NA_real_),
         polit_interest_cat = recode(polit_interest_cat, "low" = "Low", "med" = "Intermediate", "high" = "High"),
         polit_interest_cat = factor(polit_interest_cat, levels = c("Low", "Intermediate", "High")))

shp_ti$opin_foreign_std <- shp_ti$opin_foreign
shp_ti$opin_foreign_std[which(shp_ti$opin_foreign_std < 0)] <- NA
shp_ti$opin_foreign_std <- shp_ti$opin_foreign_std - mean(shp_ti$opin_foreign_std, na.rm=T)/sd(shp_ti$opin_foreign_std, na.rm = T)

shp_ti <- shp_ti %>% 
  mutate(period = case_when(year < 2000 ~ "Pre",
                            year < 2004 ~ "Trans",
                            year >= 2004 ~ "Post"))

shp_ti_italian_speaker <-  shp_ti %>% 
  group_by(idpers) %>% 
  summarise(Italian_speaker = mean(Italian_speaker, na.rm = T))

shp_ti_italian_speaker$Italian_speaker %>% table()

shp_ti <- shp_ti %>% dplyr::select(-Italian_speaker) %>% left_join(shp_ti_italian_speaker)

shp_ti <- shp_ti %>% mutate(first_language = case_when(Italian_speaker == 1 ~ "Italian",
                                                       Italian_speaker == 0 ~ "Other",
                                                       TRUE ~ NA_character_),
                            first_language = factor(first_language, levels = c("Other", "Italian"))
                            )

mod_culture_low <- felm(opin_foreign_std ~ transBorder15 * (religion + religiosity + nationality + first_language) + freeBorder15 * (religion + religiosity + nationality + first_language) | bfs19 + year + idpers | 0 | bfs19, 
     weights = shp_ti %>% filter(complete.cases(weight_i_pop), polit_interest_low == 1) %>% pull(weight_i_pop),
     data = shp_ti %>% filter(complete.cases(weight_i_pop), polit_interest_low == 1))

mod_culture_med <- felm(opin_foreign_std ~ transBorder15 * (religion + religiosity + nationality + first_language) + freeBorder15 * (religion + religiosity + nationality + first_language) | bfs19 + year + idpers | 0 | bfs19, 
     weights = shp_ti %>% filter(complete.cases(weight_i_pop), polit_interest_med == 1) %>% pull(weight_i_pop),
     data = shp_ti %>% filter(complete.cases(weight_i_pop), polit_interest_med == 1))

mod_culture_high <- felm(opin_foreign_std ~ transBorder15 * (religion + religiosity + nationality + first_language) + freeBorder15 * (religion + religiosity + nationality + first_language) | bfs19 + year + idpers | 0 | bfs19, 
     weights = shp_ti %>% filter(complete.cases(weight_i_pop), polit_interest_high == 1) %>% pull(weight_i_pop),
     data = shp_ti %>% filter(complete.cases(weight_i_pop), polit_interest_high == 1))



# Plots --------------------------------------------------------------------
## Perceived economic concerns ----------------------------------------------
df_plot <- coef.gg.prep(list(mod_satis_fin)) %>% mutate(outcome = "Financial Satisfaction") %>% 
  bind_rows(coef.gg.prep(list(mod_cmj_risk_unempl)) %>% mutate(outcome = "Unemployment Risk")) %>% 
  bind_rows(coef.gg.prep(list(mod_housingexp)) %>% mutate(outcome = "Housing Expenses"))

plot_econ_shp <- ggplot(df_plot, aes(x = modelcoef, y = outcome, xmin = ylo95, xmax = yhi95, color = variable, shape = variable, group = variable)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  ggstance::geom_linerangeh(position = ggstance::position_dodgev(.25)) +
  ggstance::geom_pointrangeh(aes(xmin = ylo90, xmax = yhi90), size = 1.25, position = ggstance::position_dodgev(.25)) +
  ylab("") +
  scale_color_manual(values = cbPalette[-3]) +
  xlab("Effect of Border Proximity on Perceived Economic Outcomes\n(Std. Deviations)") +
  theme_bw() + 
  theme(legend.position = "bottom",
        legend.background = element_rect(color = "black"),
        legend.margin = margin(4,4,4,4,unit = "pt"), 
        legend.title = element_blank())

ggsave(plot = plot_econ_shp, here("figures", "Fig6.pdf"), height = 5, width = 7)


## Opinion of foreigners by interest in Ticino ----------------------------------------------
df_plot_interest_ti <- coef.gg.prep(list(mod_opin_foreign_low_ti)) %>% mutate(outcome = "Low Political Awareness") %>% 
  bind_rows(coef.gg.prep(list(mod_opin_foreign_med_ti)) %>% mutate(outcome = "Intermediate Political Awareness")) %>% 
  bind_rows(coef.gg.prep(list(mod_opin_foreign_high_ti)) %>% mutate(outcome = "High Political Awareness")) %>% 
  mutate(outcome = factor(outcome, levels = c("High Political Awareness", "Intermediate Political Awareness", "Low Political Awareness")))

plot_interest_ti_shp <- ggplot(df_plot_interest_ti, aes(x = modelcoef, y = outcome, xmin = ylo95, xmax = yhi95, color = variable, shape = variable, group = variable)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  ggstance::geom_linerangeh(position = ggstance::position_dodgev(.25)) +
  ggstance::geom_pointrangeh(aes(xmin = ylo90, xmax = yhi90), size = 1.25, position = ggstance::position_dodgev(.25)) +
  ylab("") +
  scale_color_manual(values = cbPalette[-3]) +
  xlab("Effect of Border Proximity on Favoring Swiss\n(Std. Deviations)") +
  theme_bw() + 
  theme(legend.position = "bottom",
        legend.background = element_rect(color = "black"),
        legend.margin = margin(4,4,4,4,unit = "pt"), 
        legend.title = element_blank())
ggsave(plot = plot_interest_ti_shp, here("figures", "Fig10.pdf"), height = 5, width = 7)



# Tables ------------------------------------------------------------------
## Economic and other outcomes entire sample ------------------------------
Texreg(
  list(mod_satis_fin, mod_cmj_risk_unempl, mod_housingexp, mod_opin_taxation),
  custom.model.names = c("Financial Satisfaction", "Unemployment Risk", "Accommodation Expenses", "Favor Tax Reduction"),
  custom.gof.rows = list("Weights" = c("Yes", "Yes", "Yes", "Yes")),
  caption = "Effects on Perceived Economic Outcomes. Sample: Border Region, Travel Minutes $<=$ 30. Data: The Swiss Household Panel Survey. Regressions Include Individual (Household), Municipality, and Year Fixed Effects.",
  label = "tab:shp_econ",
  file = here("tables", "shp_econ.tex")
)

Texreg(
  list(mod_opin_army, mod_trust_gov, mod_satis_demo, mod_opin_eu),
  custom.model.names = c("Favor no Army", "Trust in Government", "Satisfaction with Democracy", "Favor Staying out of EU"),
  custom.gof.rows = list("Weights" = c("Yes", "Yes", "Yes", "Yes")),
  caption = "Effect on Alternative Outcomes. Model 1: Effect on Opinion Toward the Swiss Army (Lower Favors a Strong Army, Higher Favors No Army). Model 2: Effect on Trust in the Federal Government (Higher Indicates More Confidence). Model 3: Effect on Overall Satisfaction with Democracy (Higher Indicates More Satisfaction). Model 4: Effect on Staying Out of the EU (Higher Indicates Preference for Non-membership). Sample: Border Region, Travel Minutes $<=$ 30. Data: The Swiss Household Panel Survey. Regressions Include Individual, Municipality, and Year Fixed Effects.",
  label = "tab:shp_other_outcomes",
  file = here("tables", "shp_other_outcomes.tex")
)

## Political interest ------------------------------
Texreg(
  list(mod_opin_foreign_low_ti, mod_stable_opin_foreign_low_ti, mod_opin_foreign_med_ti, mod_stable_opin_foreign_med_ti, mod_opin_foreign_high_ti, mod_stable_opin_foreign_high_ti),
  custom.header = list("Low awareness" = 1:2, "Intermediate awareness" = 3:4, "High awareness" = 5:6),
  custom.gof.rows = list("Political awareness groups" = c(rep(c("Changing", "Constant"), 3)),
                         "Weights" = c(rep("Yes", 6))),
  caption = "Effects on Favoring Better Opportunities for Swiss Citizens by Political Awareness Groups in Ticino. Political awareness groups use measures that vary by survey wave (changing) or are determined by the average across all available survey waves (constant). Sample: Border region in Ticino, Travel Minutes $<=$ 30. Data: The Swiss Household Panel Survey. Regressions include individual, municipality, and year fixed effects.",
  label = "tab:shp_foreigners_subgroups_ti",
  file = here("tables", "shp_foreigners_subgroups_ti.tex")
  )

Texreg(
  list(mod_culture_low, mod_culture_med, mod_culture_high),
  custom.header = list("Low awareness" = 1, "Intermediate awareness" = 2, "High awareness" = 3),
  custom.gof.rows = list("Weights" = rep("Yes", 3)),
  omit.coef = "first_languageItalian",
  custom.coef.map = list(
    "transBorder15" = "0--15 Minutes X Transition",
    "freeBorder15" = "0--15 Minutes X Free",
    "religionRoman Catholic" = "Roman Catholic",
    "nationalitySwiss" = "Swiss Nationality",
    #"first_languageItalian" = "First Language: Italian",
    "religiosityAt least once a month" = "Religious services: at least once a month",
    "transBorder15:religionRoman Catholic" = "0--15 Minutes X Transition X Roman Catholic",
    "transBorder15:religiosityAt least once a month" = "0--15 Minutes X Transition X Religious services: at least once a month",
    "transBorder15:nationalitySwiss" = "0--15 Minutes X Transition X Swiss Nationality",
    "transBorder15:first_languageItalian" = "0--15 Minutes X Transition X First Language: Italian"
    ),
  caption = "Effects on Favoring Better Opportunities for Swiss Citizens by Political Awareness Groups in Ticino Interacted with Cultural Characteristics.
  Sample: Border region in Ticino, Travel Minutes $<=$ 30. 
  Political awareness groups use measures that vary by survey wave. 
  Missing values for language were imputed using the mean value for each respondent (since it is mostly constant at the respondent level).
  Data: The Swiss Household Panel Survey. Regressions include individual, municipality, and year fixed effects.",
  label = "tab:shp_foreigners_subgroups_cultural.tex",
  file = here("tables", "shp_foreigners_subgroups_cultural.tex")
)





## Placebo Ticino ------------------------------
Texreg(
  list(mod_satis_fin_low_ti, mod_stable_satis_fin_low_ti, mod_satis_fin_med_ti, mod_stable_satis_fin_med_ti, mod_satis_fin_high_ti, mod_stable_satis_fin_high_ti),
  custom.header = list("Low awareness" = 1:2, "Intermediate awareness" = 3:4, "High awareness" = 5:6),
  custom.gof.rows = list("Political awareness groups" = c(rep(c("Changing", "Constant"), 3)),
                         "Weights" = c(rep("Yes", 6))),
  caption = "Effects on Financial Satisfaction by Political Awareness Groups in Ticino. Sample: Border region in Ticino, Travel Minutes $<=$ 30. Data: The Swiss Household Panel Survey. Regressions include individual, municipality, and year fixed effects.",
  label = "tab:shp_financial_subgroups",
  file = here("tables", "shp_financial_subgroups.tex")
)

Texreg(
  list(mod_cmj_risk_unempl_low_ti, mod_stable_cmj_risk_unempl_low_ti, mod_cmj_risk_unempl_med_ti, mod_stable_cmj_risk_unempl_med_ti, mod_cmj_risk_unempl_high_ti, mod_stable_cmj_risk_unempl_high_ti),
  custom.header = list("Low awareness" = 1:2, "Intermediate awareness" = 3:4, "High awareness" = 5:6),
  custom.gof.rows = list("Political awareness groups" = c(rep(c("Changing", "Constant"), 3)),
                         "Weights" = c(rep("Yes", 6))),
  caption = "Effects on Unemployment Risk by Political Awareness Groups in Ticino. Sample: Border Region in Ticino, Travel Minutes $<=$ 30. Data: The Swiss Household Panel Survey. Regressions include individual, municipality, and year fixed effects.",
  label = "tab:shp_unempl_subgroups",
  file = here("tables", "shp_unempl_subgroups.tex")
)

Texreg(
  list(mod_housingexp_low_ti, mod_stable_housingexp_low_ti, mod_housingexp_med_ti, mod_stable_housingexp_med_ti, mod_housingexp_high_ti, mod_stable_housingexp_high_ti),
  custom.header = list("Low awareness" = 1:2, "Intermediate awareness" = 3:4, "High awareness" = 5:6),
  custom.gof.rows = list("Political awareness groups" = c(rep(c("Changing", "Constant"), 3)),
                         "Weights" = c(rep("Yes", 6))),
  caption = "Effects on Housing Expenses by Political Awareness Groups in Ticino. Sample: Border Region in Ticino, Travel Minutes $<=$ 30. Data: The Swiss Household Panel Survey. Regressions include individual, municipality, and year fixed effects.",
  label = "tab:shp_housingexp_subgroups",
  file = here("tables", "shp_housingexp_subgroups.tex")
)

